## Warning: package 'RPANDA' was built under R version 3.5.2
## Warning: package 'picante' was built under R version 3.5.2
## Warning: package 'ape' was built under R version 3.5.2
## Warning: package 'vegan' was built under R version 3.5.2
## Warning: package 'permute' was built under R version 3.5.2
## Warning: package 'nlme' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'geiger' was built under R version 3.5.2
## Warning: package 'phytools' was built under R version 3.5.2
## Warning: package 'mvtnorm' was built under R version 3.5.2
## Warning: package 'ggplot2' was built under R version 3.5.2
Now read in the tree files we’ll be working with
#min.tree <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Macro_MinAges_CON.tre")
#mean.tree <- read.nexus("/PATH/Macro_MeanAges_CON.tre")
#max.tree <- read.nexus("/PATH/Macro_MaxAges_CON.tre")
#cp.min <- read.nexus("/PATH/Macro_CP_MinAges_CON.tre")
#cp.mean <- read.nexus("/PATH/Macro_CP_EstAges_CON.tre")
#cp.max <- read.nexus("/PATH/Macro_CP_MaxAges_CON.tre")
sampled.trees <- read.tree("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/REAL_Run5_AllSchemes_508trees_Macropodinae.trees")
empirical.trees <- read.tree("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/MODEL110_Sampled_Run2_Macropodinae.trees")
fossil.trees <- read.tree("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/REAL_Run4_Fossil_519trees_Macropodinae.trees")
consensus.tree <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/REAL_Macropodinae_ALL/Run5_Combined123_CON.tre")
Choose the current tree we want to work with
tree <- consensus.tree
And the hypsodonty data
# raw data for all the macropodoid taxa
all.HI <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Data/CrownHeight_Data.csv", header=T)
# or just species means for the Macropodinae
hypsodonty.index <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Data/CrownHeight_Macropodinae_spMEANS.csv", header=T)
head(hypsodonty.index)
## Taxon HI HI_Source Diet_Guild
## 1 Baringa_nelsonensis 1.3259480 Couzens & Prideaux 2018 Browser
## 2 Baringa_sp_indet 1.1395682 Couzens & Prideaux 2018 Browser
## 3 Bohra_bandharr 1.0119760 Couzens & Prideaux 2018 <NA>
## 4 Bohra_illuminata 0.9373134 Couzens & Prideaux 2018 <NA>
## 5 Bohra_nullarbora 1.0722913 Couzens & Prideaux 2018 <NA>
## 6 Bohra_sp_indet 1.2234637 Couzens & Prideaux 2018 <NA>
## Guild_Source
## 1 Dawson 2006
## 2 Dawson 2006
## 3
## 4
## 5
## 6
The C4 plant reconstruction data from Andrae
enviro.data <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Andrae_S1.csv", header=T)
grass.data <- enviro.data[1:25, c(1,3)]
# this will also remove the last sample (from 9.5 mya) which is spurious, and keep just the mean estimate
head(enviro.data); head(grass.data)
## Age Age_Error C4_recon_mean C4_recon_lower C4_recon_upper
## 1 1.0 0.002 59.2 36.7 81.6
## 2 2.0 0.001 36.6 11.8 61.3
## 3 2.5 0.008 36.0 11.2 60.8
## 4 2.8 0.005 35.4 10.5 60.2
## 5 2.8 0.013 21.8 0.0 48.1
## 6 3.0 0.005 38.8 14.3 63.3
## Age C4_recon_mean
## 1 1.0 59.2
## 2 2.0 36.6
## 3 2.5 36.0
## 4 2.8 35.4
## 5 2.8 21.8
## 6 3.0 38.8
And the dust flux data from Andrae
flux.data <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Aeolian_Flux.csv", header=T)
head(flux.data)
## Age A_Flux
## 1 0.05931646 108.3949
## 2 0.10874684 113.8622
## 3 0.20760760 118.6517
## 4 0.25703797 121.9088
## 5 0.35589873 109.4722
## 6 0.40532911 116.3859
As well as the paleotemperature data
data(InfTemp)
head(InfTemp)
## Age Temperature
## 1 0.000 3.902176
## 2 0.000 2.900296
## 3 0.002 4.309984
## 4 0.002 5.172534
## 5 0.004 3.733446
## 6 0.004 4.309984
Trim tree and data down to overlapping taxa
# extract the taxa that are in both the tree and
overlaps <- intersect(tree$tip.label, unique(hypsodonty.index$Taxon))
macro.tree <- drop.tip(tree, setdiff(tree$tip.label, overlaps))
#macro.tree <- lapply(tree, drop.tip, tip=tip.drops) # if you're using a set of trees (fossil, sampled)
trim.data <- dplyr::filter(hypsodonty.index, Taxon %in% overlaps)
macro.HI <- trim.data[,2]; names(macro.HI) <- trim.data[,1]; geiger::name.check(macro.tree, macro.HI)
## [1] "OK"
macro.HI
## Baringa_nelsonensis Bohra_illuminata
## 1.3259480 0.9373134
## Dendrolagus_bennettianus Dendrolagus_dorianus
## 1.0115887 0.9400000
## Dendrolagus_goodfellowi Dendrolagus_inustus
## 0.8500000 0.8899909
## Dendrolagus_lumholtzi Dendrolagus_matschiei
## 0.9342720 0.9100000
## Dorcopsis_hageni Dorcopsis_veterum
## 0.8800000 1.0200000
## Dorcopsoides_fossilis Dorcopsulus_vanheurni
## 0.7554314 0.9793040
## Kurrabi_mahoneyi Lagorchestes_conspicillatus
## 1.4331210 1.1378107
## Lagorchestes_hirsutus Lagostrophus_fasciatus
## 1.2047786 1.1600000
## Macropus_agilis Macropus_antilopinus
## 1.1692005 1.2300000
## Macropus_eugenii Macropus_fuliginosus
## 1.1109478 1.3630443
## Macropus_giganteus Macropus_irma
## 1.3300000 1.1372544
## Macropus_parma Macropus_parryi
## 1.3176042 1.3475744
## Macropus_pavana Macropus_robustus
## 1.1823511 1.2418003
## Macropus_rufogriseus Macropus_rufus
## 1.3500000 1.3978825
## Onychogalea_fraenata Onychogalea_unguifera
## 1.5000000 1.2694731
## Peradorcas_concinna Petrogale_assimilis
## 1.2222222 1.1667479
## Petrogale_brachyotis Petrogale_inornata
## 1.0074257 1.6500000
## Petrogale_lateralis Petrogale_penicillata
## 1.2076257 1.2858835
## Petrogale_purpureicollis Petrogale_rothschildi
## 1.8200000 1.4000000
## Petrogale_xanthopus Prionotemnus_palankarinnicus
## 1.2100000 1.0221543
## Protemnodon_anak Setonix_brachyurus
## 1.0869565 1.0196716
## Thylogale_billardierii Thylogale_brunii
## 1.1231331 1.0000000
## Thylogale_stigmatica Thylogale_thetis
## 1.1895715 1.1652379
## Wallabia_bicolor
## 1.1218826
If you’re working with the raw data, trim tree and data down to just Macropodinae
macros <- dplyr::filter(all.HI, Higher_tax == "Macropodinae")
overlaps <- intersect(tree$tip.label, unique(all.HI$Taxon))
trim.tree <- drop.tip(tree, setdiff(tree$tip.label, overlaps))
trim.raw <- filter(macros, Taxon %in% overlaps)
create a tibble to get the species means (if you haven’t done this already)
library(dplyr)
sp.means <- trim.raw %>%
group_by(Taxon) %>%
summarise_at(vars(H_HYPCD/PW), mean)
#write.csv(sp.means, row.names=FALSE, file="/PATH/CrownHeight_Macropodinae_spMEANS.csv") # uncomment to write the file
Let’s quickly visualize the data in a few different ways to get an idea of what’s going on
# Barplot of trait value
plotTree.barplot(macro.tree, macro.HI, args.barplot=list(beside=TRUE, border=F))
# Continuous trait map
obj1 <- contMap(macro.tree, macro.HI, plot=FALSE, outline=F);
n<-length(obj1$cols);
obj1$cols[1:n] <- rev(colorRampPalette(brewer.pal(9, "YlGnBu"))(n));
plot(obj1,legend=0.7*max(nodeHeights(obj1$tree)),
fsize=c(0.7,0.9), lwd=5, border=F); axisPhylo(1, backward=T)
hidata <- hypsodonty.index[complete.cases(hypsodonty.index[,c(1,2,4)]),]
hidata <- hidata[order(hidata$HI),]
hidata$Taxon <- factor(hidata$Taxon, levels = hidata$Taxon)
ggplot(hidata, aes(x=Taxon, y=HI, fill=Diet_Guild), colour=brewer.pal(3,"Paired")) +
geom_col() + theme_classic() + coord_flip() +
scale_x_discrete(limits = rev(hidata$Taxon)) +
scale_fill_brewer(palette="Paired", "Diet_Guild")
Now we can look at the time-sampled variables
# make a plot of the grass data
pp <- ggplot(enviro.data[1:25,], aes(Age)) +
geom_ribbon(aes(ymin = C4_recon_lower, ymax = C4_recon_upper), fill = "light green") +
geom_line(aes(y = C4_recon_mean), color="DarkGreen") + scale_x_reverse() + theme_classic() +
coord_cartesian(xlim = c(0, 20), ylim = c(0,80), expand = FALSE)
#qq <- gggeo_scale(pp, dat="epochs") # if you want to plot a geological timescale
# make a plot of the flux data
rr <- ggplot(flux.data, aes(Age)) +
geom_ribbon(aes(ymin = A_Flux-35, ymax = A_Flux+35), fill = "light blue") +
geom_line(aes(y = A_Flux), color="DarkBlue") + scale_x_reverse() + theme_classic() +
coord_cartesian(xlim = c(0, 20), ylim = c(0,150), expand = FALSE)
#ss <- gggeo_scale(rr, dat="epochs") # if you want to plot a geological timescale
grid.arrange(pp, rr, nrow=1)
Correlative models like the environmental models in RPANDA will be sensitive to the amount of smoothing to the trend line of the input data (see Clavel & Morlon, PNAS). To address this, we’ll create a function that searches for the optimum smoothness of the trend by fitting a set of values.
best.smoothing <- function (phy, trait.data, time.data=InfTemp, degrees=c(0,10,20,30,40,50), model="EnvExp", cores=6) {
res.list <- mclapply(1:length(degrees), function(x) {
fit_t_env(phy, trait.data, env_data=time.data, df=degrees[x], scale=F, plot=T, model=model)}, mc.cores = cores)
for(i in 1:length(res.list)){res.list[[i]]$df <- degrees[i]}
res.values <- unlist(lapply(res.list, function(x) x$aicc)) # make a vector of the values, so we can get the index number of the best
best.res <- res.list[[which.min(res.values)]]
plot(best.res, main=paste(model, "; AICc = ", round(best.res$aicc,2)), sub=paste("sigma = ",round(best.res$param[1],2), " beta = ",round(best.res$param[2],2), " df = ", best.res$df), col="red")
return(list(all.results=res.list, best.result=best.res, best.df=best.res$df))
}
We can have a look at what this smoothing actually does to our data. We can come back to look at these once we get the optimum fits for our models.
grass.spline0 <- sm.spline(x=grass.data$Age, y=grass.data$C4_recon_mean, df=0)
grass.spline10 <- sm.spline(x=grass.data$Age, y=grass.data$C4_recon_mean, df=10)
grass.spline20 <- sm.spline(x=grass.data$Age, y=grass.data$C4_recon_mean, df=20)
grass.spline30 <- sm.spline(x=grass.data$Age, y=grass.data$C4_recon_mean, df=30)
grass.spline40 <- sm.spline(x=grass.data$Age, y=grass.data$C4_recon_mean, df=40)
grass.spline50 <- sm.spline(x=grass.data$Age, y=grass.data$C4_recon_mean, df=50)
plot(grass.data, main="C4 Grass Reconstruction Through Time", xlab="Millions of Years Ago", ylab="Percent C4")
lines(grass.spline0, col="red", lwd=4)
lines(grass.spline10, col="green", lwd=4)
lines(grass.spline20, col="blue", lwd=4)
lines(grass.spline30, col="yellow", lwd=4)
lines(grass.spline40, col="violet", lwd=4)
lines(grass.spline50, col="gold", lwd=4)
flux.spline0 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=0)
flux.spline10 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=10)
flux.spline20 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=20)
flux.spline30 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=30)
flux.spline40 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=40)
flux.spline50 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=50)
plot(flux.data, main="Aeolian Dust Flux Through Time", xlab="Millions of Years Ago", ylab="Aeolian Dust Flux")
lines(flux.spline0, col="red", lwd=4)
lines(flux.spline10, col="green", lwd=4)
lines(flux.spline20, col="blue", lwd=4)
lines(flux.spline30, col="yellow", lwd=4)
lines(flux.spline40, col="magenta", lwd=4)
lines(flux.spline50, col="cyan", lwd=4)
data(InfTemp)
env.spline0 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=0)
env.spline10 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=10)
env.spline20 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=20)
env.spline30 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=30)
env.spline40 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=40)
env.spline50 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=50)
plot(InfTemp, main="Paleotemperature Through Time")
lines(env.spline0, col="red")
lines(env.spline10, col="green")
lines(env.spline20, col="blue")
lines(env.spline30, col="yellow")
lines(env.spline40, col="magenta")
lines(env.spline50, col="cyan")
Start with the environmental model of paleotemperature. You can designate the number of cores and the amount of smoothing
# linear model first
ENVexp <- best.smoothing(macro.tree, macro.HI, time.data=InfTemp,
degrees=c(10,20,30,40,50), model="EnvExp", cores=5)
# exponential model next
ENVlin <- best.smoothing(macro.tree, macro.HI, time.data=InfTemp,
degrees=c(10,20,30,40,50), model="EnvLin", cores=5)
Next up the grass model, using C4 reconstructions.
GRASSexp <- best.smoothing(macro.tree, macro.HI, time.data=grass.data,
degrees=c(10,20,30,40,50), model="EnvExp", cores=5)
GRASSlin <- best.smoothing(macro.tree, macro.HI, time.data=grass.data,
degrees=c(10,20,30,40,50), model="EnvLin", cores=5)
And finally the flux models, using aeolian dust measurements.
FLUXexp <- best.smoothing(macro.tree, macro.HI, time.data=flux.data,
degrees=c(10,20,30,40,50), model="EnvExp", cores=5)
FLUXlin <- best.smoothing(macro.tree, macro.HI, time.data=flux.data,
degrees=c(10,20,30,40,50), model="EnvLin", cores=5)
Lastly, for comparison, run a few standard models. These are Brownian Motion, Brownian Motion with a Trend, and Early Burst.
BM_res <- fitContinuous(macro.tree, macro.HI, model="BM")
trend_res <- fitContinuous(macro.tree, macro.HI, model="trend")
EB_res <- fitContinuous(macro.tree, macro.HI, model="EB")
## Warning in fitContinuous(macro.tree, macro.HI, model = "EB"): Parameter estimates appear at bounds:
## a
Compare the models with AICc, and check differences across the trees
model_FIT <- c(ENVexp$best.result$aicc, ENVlin$best.result$aicc,
GRASSlin$best.result$aicc, GRASSexp$best.result$aicc,
FLUXexp$best.result$aicc, FLUXlin$best.result$aicc,
BM_res$opt$aicc, trend_res$opt$aicc, EB_res$opt$aicc);
names(model_FIT) <- c("ENVexp", "ENVlin", "GRASSlin", "GRASSexp", "FLUXexp", "FLUXlin", "BM", "Trend", "EB")
aic.w(model_FIT)
## ENVexp ENVlin GRASSlin GRASSexp FLUXexp FLUXlin
## 0.01949302 0.00000000 0.12961408 0.82510494 0.01154649 0.00672102
## BM Trend EB
## 0.00192663 0.00497934 0.00061449
fit.aic <- as.data.frame(as.vector(aic.w(model_FIT))); fit.aic$model <- names(model_FIT); colnames(fit.aic) <- c("aiccw", "model"); fit.aic$age <- "Consensus Tree"
Quickly collapse models from the same data
fit.aic$model.type <- c("ENV", "ENV", "GRASS", "GRASS", "FLUX", "FLUX", "BM", "Trend", "EB")
fit.aic$model.type <- factor(fit.aic$model.type, levels=c("ENV", "FLUX", "GRASS", "BM", "EB", "Trend"))
Then plot the model fits as AICc weights
library(wesanderson)
(ggplot(fit.aic)
+ geom_bar(aes(y=aiccw, x=age, fill=model.type), stat="identity")
#+ theme(axis.text.x=element_text(angle=25, hjust=1), panel.background=element_blank(), legend.position="bottom")
+ theme_classic()
+ scale_fill_manual( values=wes_palette("Zissou1", 6, "continuous")))
Ok, now that we’ve fit the models to a given tree, we want to fit the models to lots of trees of different ages, shapes, etc. Let’s read in a file I’ve made of trees from the whole span of dating analyses.
tree.span <- read.tree("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Tree_Span.tre")
tree.span
Now we’ll run a loop across all of these trees, to fit the models to each one. It may take a little while.
# MAKE THIS EVAL=TRUE IF YOU WANT TO DO THIS BIT FOR REAL
tree.span <- fossil.trees
mean.data <- fossil.HI
# Fit all the models to a series of trees!
all.aics <- NULL; all.results <- NULL; timer <- progress_estimated(length(tree.span))
for (k in 1:length(tree.span)){
int.results <- NULL
# Fit a number of models to the data (ENV, GRASS, BM, EB, Trend, Drift)
ENVexp <- best.smoothing(tree.span[[k]], mean.data, time.data=InfTemp,
degrees=c(10,20,30,40,50), model="EnvExp", cores=5);
int.results[["ENVexp"]] <- ENVexp$best.result;
ENVlin <- best.smoothing(tree.span[[k]], mean.data, time.data=InfTemp,
degrees=c(10,20,30,40,50), model="EnvLin", cores=5);
int.results[["ENVlin"]] <- ENVlin$best.result;
GRASSexp <- best.smoothing(tree.span[[k]], mean.data, time.data=grass.data,
degrees=c(30,40,50), model="EnvExp", cores=3);
int.results[["GRASSexp"]] <- GRASSexp$best.result;
GRASSlin <- best.smoothing(tree.span[[k]], mean.data, time.data=grass.data,
degrees=c(30,40,50), model="EnvLin", cores=3);
int.results[["GRASSlin"]] <- GRASSlin$best.result;
FLUXexp <- best.smoothing(tree.span[[k]], mean.data, time.data=flux.data,
degrees=c(10,20,30,40,50), model="EnvExp", cores=5);
int.results[["FLUXexp"]] <- FLUXexp$best.result;
FLUXlin <- best.smoothing(tree.span[[k]], mean.data, time.data=flux.data,
degrees=c(10,20,30,40,50), model="EnvLin", cores=5);
int.results[["FLUXlin"]] <- FLUXlin$best.result;
BM_res <- fitContinuous(tree.span[[k]], mean.data, model="BM");
int.results[["BM"]] <- BM_res
trend_res <- fitContinuous(tree.span[[k]], mean.data, model="trend");
int.results[["Trend"]] <- trend_res
EB_res <- fitContinuous(tree.span[[k]], mean.data, model="EB");
int.results[["EB"]] <- EB_res
curr_tree_FIT <- c(ENVexp$best.result$aicc, ENVlin$best.result$aicc,
GRASSexp$best.result$aicc, GRASSlin$best.result$aicc,
FLUXexp$best.result$aicc, FLUXlin$best.result$aicc,
BM_res$opt$aicc, trend_res$opt$aicc, EB_res$opt$aicc);
names(curr_tree_FIT) <- c("ENVexp", "ENVlin", "GRASSexp", "GRASSlin",
"FLUXexp", "FLUXlin", "BM", "Trend", "EB")
curr_tree_SIG <- c(ENVexp$best.result$param[[1]], ENVlin$best.result$param[[1]],
GRASSexp$best.result$param[[1]], GRASSlin$best.result$param[[1]],
FLUXexp$best.result$param[[1]], FLUXlin$best.result$param[[1]],
BM_res$opt$sigsq, trend_res$opt$sigsq, EB_res$opt$sigsq);
names(curr_tree_SIG) <- c("ENVexp", "ENVlin", "GRASSexp", "GRASSlin",
"FLUXexp", "FLUXlin", "BM", "Trend", "EB")
curr_tree_PAR <- c(ENVexp$best.result$param[[2]], ENVlin$best.result$param[[2]],
GRASSexp$best.result$param[[2]], GRASSlin$best.result$param[[2]],
FLUXexp$best.result$param[[2]], FLUXlin$best.result$param[[2]],
NA, trend_res$opt$slope, EB_res$opt$a);
names(curr_tree_PAR) <- c("ENVexp", "ENVlin", "GRASSexp", "GRASSlin",
"FLUXexp", "FLUXlin", "BM", "Trend", "EB")
curr.aic <- as.data.frame(as.vector(aic.w(curr_tree_FIT)));
curr.aic$model <- names(curr_tree_FIT); colnames(curr.aic) <- c("aiccw", "model");
curr.aic$age <- round(max(nodeHeights(tree.span[[k]])), 4)
curr.aic$tree <- k
curr.aic$sigsq <- curr_tree_SIG
curr.aic$par <- curr_tree_PAR
all.results[[k]] <- int.results
curr.aic$model.type <- c("ENV", "ENV", "GRASS", "GRASS",
"FLUX", "FLUX", "BM", "Trend", "EB")
all.aics <- rbind.data.frame(all.aics, curr.aic);
print(timer$tick())
}
Save the file externally:
saveRDS(all.aics, file="/PATH/Fossil_Trees_Model_Fitting_AICCs.RDS")
saveRDS(all.results, file="/PATH/Fossil_Trees_Model_Fitting_Results.RDS")
#saveRDS(all.aics, file="/PATH/Model_Fitting_AICCs.RDS")
Or skip the work and read in the file instead:
all.aics <- readRDS("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Data/FINAL_SAMPLED_Model_Fitting_AICCs.RDS")
all.aics$model.type <- factor(all.aics$model.type, levels=c("ENV", "FLUX", "GRASS", "BM", "EB", "Trend"))
all.aics$age <- as.factor(all.aics$age)
sampled.res <- (ggplot(all.aics)
+ geom_bar(aes(y=aiccw, x=age, fill=model.type), stat="identity")
+ theme(axis.text.x=element_text(angle=90, hjust=1), panel.background=element_blank(), legend.position="none")
+ scale_fill_manual( values=wes_palette("Zissou1", 6, "continuous")))
sampled.res
And then summarize the model support (average AICcWt) across all trees
# Summarize the model support (average AICcWt)
aic.sum <- summarySE(all.aics, measurevar="aiccw", groupvars="model")
aic.sum$model <- factor(aic.sum$model, levels=c("ENVlin", "ENVexp", "FLUXlin", "FLUXexp", "GRASSexp", "GRASSlin", "BM", "EB", "Trend"))
aic.sum$model.type <- c("BM", "EB", "ENV", "ENV", "FLUX", "FLUX", "GRASS", "GRASS", "Trend")
aic.sum$model.type <- factor(aic.sum$model.type, levels=c("ENV", "FLUX", "GRASS", "BM", "EB", "Trend"))
fossil.bar <- (ggplot(aic.sum, aes(x=model.type, y=aiccw, fill=model.type))
+ geom_bar(stat="identity")
#+ geom_errorbar(aes(ymin=aiccw-se, ymax=aiccw+se), size=0.3, width=0.2)
+ scale_fill_manual( values=wes_palette("Zissou1", 6, "continuous"))
+ theme_classic()
+ theme(legend.position="none")
#+ facet_wrap(~group, nrow=3, ncol=2)
+ geom_text(aes(label=percent(aiccw), vjust=-4)))
fossil.bar
We can also visualize the estimated evolutionary rates of the trait.
First create an adjusted version of the plotting function from RPANDA:
# Adjust the RPANDA plotting function so we can fix the axes, and do a bunch of plots
plot.fixed_t_env <- function (x, steps = 100,
xlim=c(-10,0), ylim=c(0,1), linecol="red", ...)
{
if (is.function(x$model)) {
fun_temp <- function(x, temp, model, param) {
rate_fun <- function(x) {
model(x, temp, param)
}
rate <- rate_fun(x)
return(rate)
}
}
else if (x$model == "EnvExp") {
fun_temp <- function(x, temp, model, param) {
sig <- param[1]
beta <- param[2]
rate <- (sig * exp(beta * temp(x)))
return(rate)
}
}
else if (x$model == "EnvLin") {
fun_temp <- function(x, temp, model, param) {
sig <- param[1]
beta <- param[2]
rate <- sig + (beta - sig) * temp(x)
return(rate)
}
}
t <- seq(0, x$tot_time, length.out = steps)
rates <- fun_temp(x = t, temp = x$env_func, model = x$model,
param = x$param)
plot(-t, rates, type = "l", xlab = "Times",
ylab = bquote(paste("Evolutionary rates ",sigma)),
xlim=xlim, ylim=ylim, col=linecol, ...)
results <- list(time_steps = t, rates = rates)
invisible(results)
}
I’ll plot just the results from the GRASS-linear model, but you could do this for all.
all.aics <- readRDS("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Data/FINAL_SAMPLED_Model_Fitting_AICCs.RDS")
all.grass.results <- readRDS("/Users/Ian/Desktop/REAL_SAMPLED_GRASS_Model_Fitting_AllResults.RDS")
sub.aic <- filter(all.aics, model=="GRASSlin" & aiccw > 0.5); sub.trees <- sub.aic$tree; print(paste(length(sub.trees), "estimates"))
## [1] "111 estimates"
for(i in sub.trees){plot.fixed_t_env(all.grass.results[[i]]$GRASSlin,
xlim=c(-12,0), ylim=c(0,0.2),
linecol=randomcoloR::randomColor(1));
par(new=T)}
Also plot the beta value of the correlation between rate and the variable
# And plot the beta values for preferred models
col.pal <- brewer.pal(8, "Paired")
beta.values <- filter(all.aics, model %in% c("GRASSexp", "FLUXexp", "GRASSlin", "ENVexp") & aiccw >= 0.3); #beta.values <- filter(beta.values, par<70 & par>-25)
ge_beta <- filter(beta.values, model=="GRASSexp"); ge <- ggplot(ge_beta, aes(x=model, y=par)) + geom_violin(fill=col.pal[[4]]) + theme_classic() + geom_boxplot(fill="white", width=0.075) #+ geom_dotplot(binaxis="y", dotsize=0.1, binwidth=5)
gl_beta <- filter(beta.values, model=="GRASSlin"); gl <- ggplot(gl_beta, aes(x=model, y=par)) + geom_violin(fill=col.pal[[3]]) + theme_classic() + geom_boxplot(fill="white", width=0.075) #+ geom_dotplot(binaxis="y", dotsize=0.1, binwidth=5)
fe_beta <- filter(beta.values, model=="FLUXexp" ); fe <- ggplot(fe_beta, aes(x=model, y=par)) + geom_violin(fill=col.pal[[1]]) + theme_classic() + geom_boxplot(fill="white", width=0.075) #+ geom_dotplot(binaxis="y", dotsize=0.1, binwidth=5)
ee_beta <- filter(beta.values, model=="ENVexp" ); ee <- ggplot(ee_beta, aes(x=model, y=par)) + geom_violin(fill=col.pal[[2]]) + theme_classic() + geom_boxplot(fill="white", width=0.075) #+ geom_dotplot(binaxis="y", dotsize=0.1, binwidth=5)
gridExtra::grid.arrange(gl, ge, fe, ee, nrow=1)
Make a quick function to get the DEPTH of a node (from present), instead of the HEIGHT (from root)
MRCA.depth <- function(phy){max(nodeHeights(phy)) - findMRCA(phy, tips=c("Macropus_irma", "Wallabia_bicolor"), type="height")}
Prepare trees for comparison of ages
# Prepare trees for comparison of ages
max.age.trees <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/REAL_Macropodinae_ALL/Alternative_Dating_Schemes/Max_Run_Combined1234.trees");
max.ages <- max.age.trees[sample(1:length(max.age.trees),300)]
#max.ages <- lapply(max.age.trees, drop.tip, tip=setdiff(max.age.trees[[1]]$tip.label, overlaps)); class(max.ages) <- "multiPhylo";
age.max <- as.data.frame(unlist(lapply(max.ages, MRCA.depth)));
age.max$tree <- "fixed.max"; colnames(age.max) <- c("age", "tree")
mean.age.trees <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/REAL_Macropodinae_ALL/Alternative_Dating_Schemes/Mean_Run_Combined1234.trees");
mean.ages <- mean.age.trees[sample(1:length(mean.age.trees),300)]
#mean.ages <- lapply(mean.age.trees, drop.tip, tip=setdiff(mean.age.trees[[1]]$tip.label, overlaps)); class(mean.ages) <- "multiPhylo"
age.mean <- as.data.frame(unlist(lapply(mean.ages, MRCA.depth)));
age.mean$tree <- "fixed.mean"; colnames(age.mean) <- c("age", "tree")
min.age.trees <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/REAL_Macropodinae_ALL/Alternative_Dating_Schemes/Min_Run_Combined1234.trees"); min.ages <- min.age.trees[sample(1:length(min.age.trees),300)]
#min.ages <- lapply(min.age.trees, drop.tip, tip=setdiff(min.age.trees[[1]]$tip.label, overlaps)); class(min.ages) <- "multiPhylo"
age.min <- as.data.frame(unlist(lapply(min.ages, MRCA.depth)));
age.min$tree <- "fixed.min"; colnames(age.min) <- c("age", "tree")
sampled500.trees <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/REAL_Macropodinae_ALL/Run5_Combined123.trees");
sampled.ages <- sampled500.trees[sample(1:length(sampled500.trees),300)]
sampled500.trees <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/REAL_Macropodinae_ALL/Run5_Combined123.trees");
sampled.ages <- sampled500.trees[(length(sampled500.trees)-200):length(sampled500.trees)]
#min.ages <- lapply(min.age.trees, drop.tip, tip=setdiff(min.age.trees[[1]]$tip.label, overlaps)); class(min.ages) <- "multiPhylo"
age.sampled <- as.data.frame(unlist(lapply(sampled.ages, MRCA.depth))); age.sampled$tree <- "est.prior"; colnames(age.sampled) <- c("age", "tree")
Combine the ages from different dating schemes, and plot them
age.all <- rbind(age.min, age.sampled, age.mean, age.max)
age.all$tree <- factor(age.all$tree, levels=c("fixed.max","est.prior", "fixed.mean", "fixed.min"))
(ggplot(age.all, aes(x=age, fill=tree))
+ geom_density(alpha=0.75, adjust=1.5)
+ theme(axis.text.x=element_text(angle=0, hjust=1), panel.background=element_blank(), legend.position="bottom")
+ scale_fill_manual(values=wes_palette("Zissou1", type="continuous", 4))
+ scale_x_reverse(lim=c(8,0)))
prior.trees <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/REAL_Macropodinae_ALL/Alternative_Dating_Schemes_PriorOnly/Mean_1/All_Macropodinae_FBD_Relax_PRIOR.trees");
prior.trees <- prior.trees[(length(prior.trees)-1000):length(prior.trees)]
post.trees <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/REAL_Macropodinae_ALL/Alternative_Dating_Schemes/Mean_Run_Combined1234.trees");
post.trees <- post.trees[(length(post.trees)-1000):length(post.trees)]
Choose which tips you want information for:
fossil_taxa <- c("Baringa_nelsonensis","Congruus_congruus",
"Kurrabi_mahoneyi","Macropus_pavana")
Get the node numbers of the tips
nodes <- sapply(fossil_taxa,function(x,y) which(y==x),y=tree$tip.label)
Then get the edge lengths for those nodes
edge.lengths <- setNames(tree$edge.length[sapply(nodes,
function(x,y) which(y==x),y=tree$edge[,2])],names(nodes))
The faster way is to make a function to do this:
get_terminal_branchlengths <- function(phy, tipnames){
## Get the node numbers of the tips
nodes <- sapply(tipnames,function(x,y) which(y==x),y=phy$tip.label)
## Then get the edge lengths for those nodes
edge.lengths <- setNames(phy$edge.length[sapply(nodes,
function(x,y) which(y==x),y=phy$edge[,2])],names(nodes))
return(edge.lengths)
}
Now that we’ve got the tips and branch lengths, we can compare the posterior to the prior
BFSA <- function(prior.phy, posterior.phy, tips){
post <- lapply(posterior.phy, get_terminal_branchlengths, tipnames=tips); names(post) <- NULL; post <- unlist(post)
prior <- lapply(prior.phy, get_terminal_branchlengths, tipnames=tips); names(prior)<- NULL; prior <- unlist(prior)
BFs <- NULL
for (j in 1:length(tips)){
curr.tip <- subset(post, names(post)==tips[j]);
probSA <- sum(curr.tip<=0); probTIP <- length(curr.tip)-probSA;
curr.tip <- subset(prior, names(prior)==tips[j]);
priorSA <- sum(curr.tip<=0); priorTIP <- length(curr.tip)-priorSA;
curr.BF <- log((probSA * priorTIP) / (probTIP * priorSA))
if(is.na(curr.BF)){curr.BF <- 0}
#curr.BF <- log(probSA/(length(curr.tip)-probSA))
names(curr.BF) <- tips[j]; curr.BF <- round(curr.BF, 2)
BFs <- append(BFs, curr.BF)
}
return(BFs)
}
Orient the data appropriately
macro_BFs <- BFSA(prior.trees, post.trees, tips=fossil_taxa)
macro_BFs <- as.data.frame(macro_BFs) # make the vector a data frame
macro_BFs[which(macro_BFs$macro_BFs > 5),] <- 5 # change any really big (INF) numbers to 5
macro_BFs[which(macro_BFs$macro_BFs < -5),] <- -5 # change any really small (-INF) numbers to -5
macro_BFs$taxa <- rownames(macro_BFs); # create column with the taxon names
macro_BFs <- macro_BFs[order(macro_BFs$macro_BFs),] # reorder by BF values
# set colors for plotting
macro_BFs$color <- "black";
#macro_BFs[which(macro_BFs$macro_BFs > 1),]$color <- "#3d98d3"
macro_BFs[which(macro_BFs$macro_BFs < -1),]$color <- "#FF7175"
macro_BFs$taxa <- factor(macro_BFs$taxa, levels=c(macro_BFs$taxa)) # set factors for plotting (NOT NECESSARY)
Then plot the data
ggplot(macro_BFs, aes(x=taxa, y=macro_BFs, label=macro_BFs)) +
geom_ribbon(aes(ymin=-1, ymax=+1)) +
geom_point(stat='identity', size=8, color=macro_BFs$color) +
# geom_segment(aes(y = 0,
# x = taxa,
# yend = macro_BFs,
# xend = taxa),
# color = macro_BFs$color) +
geom_text(color="white", size=2) +
#labs(title="Bayes Factor Support", subtitle="for Fossil Taxa as Sampled Ancestors") +
#ylim(-5, 5) +
scale_y_continuous(name="log Bayes Factors", limits=c(-5,5), breaks=c(-5:5)) +
scale_x_discrete(limits = rev(unique(sort(macro_BFs$taxa)))) + # drop this if you want to order it differently
#theme(panel.background=element_blank()) +
geom_hline(yintercept=-1) +
geom_hline(yintercept=1) +
xlab("Fossil Taxa") +
#ylab("log Bayes Factors") +
theme_classic() +
#theme(axis.text.y=element_blank(), axis.title.y=element_blank()) +
coord_flip()
Create a function to pull the ages of each fossil taxon estimated
get.fossil.ages <- function(fossil.tips, trees){
tree.tables <- lapply(trees, print.tree)
fossil.tables <- lapply(1:length(tree.tables), function(x) {
subset(tree.tables[[x]], tree.tables[[x]]$label %in% fossil.tips)
})
fossil.ages <- lapply(1:length(fossil.tables), function(x) {
select(fossil.tables[[x]], label, time_bp)
})
final <- bind_rows(fossil.ages)
}
Need to create a function called “print.tree” that I borrowed some code from biogeoBEARS
Now pull out the info on those fossils
my.test <- get.fossil.ages(fossil.tips = fossil_taxa, trees = post.trees)
(ggplot(my.test, aes(x=time_bp, y=label, fill=..x..))
+ scale_fill_gradientn(colours=wes_palette("Zissou1"))
+ geom_density_ridges_gradient(scale=1.5)
+ scale_x_reverse()
+ theme_classic())
## Picking joint bandwidth of 0.6